/******************************************************************************
Paper: The Impact of Welfare on Intergroup Relations
Author: Akshay Dixit

Survey: This .do file cleans data from the household survey for analysis.
******************************************************************************/

clear all
set more off

gl data "$identity/data/survey"


*** Import data ***

u "$data/Household Survey V2.dta", clear

* Merge with village survey data

merge m:1 village using "$data/Village Survey V2_clean.dta"
drop _merge

******************************************************************************

*** Independent variables ***

count
tab state
tab ownagriland
replace ownagriland = 0 if q2p5_kharif == 0

* Main variables
g telangana = (state == 2)
g telangana_ownagriland = telangana * ownagriland

* Binary variable for scheduled caste
g scheduled_caste = (q1p10 == 1)

* Land ownership
g land_area = .
replace q2p5_kharif = . if q2p5_kharif < 0
replace land_area = 0 if ownagriland == 0
replace land_area = q2p5_kharif/100 if q2p4_kharif == 1		// 100 Cent = 1 Acre
replace land_area = q2p5_kharif if q2p4_kharif == 2			// 1 Acre = 1 Acre
replace land_area = q2p5_kharif/40 if q2p4_kharif == 3		// 40 Guntha = 1 Acre

* Covariates
ren q1p12 hh_size
ren v_q2p1 village_population
ren v_q2p7 castes_in_village
replace castes_in_village = 10 if castes_in_village == 0	// Fix data entry error

* Education
replace q1p6 = 0 if q1p6_os == "No study"
replace q1p6 = 13 if q1p6_os == "B tec comlet" | q1p6_os == "B tech" | q1p6_os == "Bba" | q1p6_os == "Byek" | q1p6_os == "Ma med"
replace q1p6 = . if q1p6 == 98
ren q1p6 education

* Welfare benefits
foreach var in q6p1c q6p2c q6p3c q6p4c q6p5c q6p6c q6p7c q6p8c q6p9c q6p10c q6p11c q6p12c q6p13c q6p14c q6p15c q6p16c q6p17c q6p18c q6p19c q6p23_1 {
	replace `var' = . if `var' < 0
}

	/*
		A few data entry errors crept in with surveyors entering amounts in Rupees.
		These are fixed below.
	*/

	// Fix data entry errors in welfare amounts (PM-KISAN benefits never exceed 6000)
replace q6p1c = 6000 if q6p1c > 6000 & q6p1c != .

	// Fix data entry errors in welfare amounts (Rythu Bharosa benefits never exceed 7500)
replace q6p14c = 7500 if q6p14c > 7500 & q6p14c != .

	// Fix entry errors in RBS benefits
replace q6p7c = 20000 if key == "uuid:4fb4fa64-0e52-47e4-be94-6b0ca7728b6f"
replace q6p7c = 120000 if key == "uuid:be9d797e-8a1b-49ef-9149-e94e7cd65ce7"
replace q6p7c = 25000 if key == "uuid:7fdd3ddd-3bb4-417c-a0c7-d67d1e7cd5c4"
replace q6p7c = 30000 if key == "uuid:e303e460-9ddf-43ee-8e50-176f7ee00690"
replace q6p7c = 10000 if key == "uuid:470e7819-4bf4-4252-a360-64aa7e452159"
		
	// Health insurance benefits duplicated and listed multiple times under different schemes
replace q6p6c = 0 if key == "uuid:be9d797e-8a1b-49ef-9149-e94e7cd65ce7" | key == "uuid:63ec331c-e306-4e45-83bf-820315210528" | key == "uuid:7035f4b4-5f91-4441-981e-9270754a21e3"

	// A few data entry errors in other schemes
replace q6p8c = 100000 if key == "uuid:1279cf8a-b1d9-4f37-9bbe-ef39de288f28"
replace q6p8c = 100016 if key == "uuid:441baef2-2c67-4695-94b9-f004d0df868c"
replace q6p2c = 60000 if key == "uuid:9dc7dc1b-0ad1-4d11-9df1-b67d5d047442"
replace q6p17c = 18000 if key == "uuid:20abb914-146b-43ec-b3d4-2886cdb2d16a"

	// Total welfare amount with cleaned data
egen welfare_amount = rowtotal(q6p1c q6p2c q6p3c q6p4c q6p5c q6p6c q6p7c q6p8c q6p9c q6p10c q6p11c q6p12c q6p13c q6p14c q6p15c q6p16c q6p17c q6p18c q6p19c q6p23_1)

	// Welfare net of RBS or Rythu Bharosa
egen other_welfare = rowtotal(q6p1c q6p2c q6p3c q6p4c q6p5c q6p6c q6p8c q6p9c q6p10c q6p11c q6p12c q6p13c q6p15c q6p16c q6p17c q6p18c q6p19c q6p23_1)

******************************************************************************

*** Het. effects by land inequality in the village ***

	// Coding missing values appropriately
replace v_q2p14 = . if v_q2p14 < 0
replace v_q2p13 = . if v_q2p13 < 0
	
	// Land-to-population ratio of caste owning a plurality of land
g dominant_land_to_pop = (v_q2p14/v_q2p13)

preserve
	collapse dominant_land_to_pop, by(village)
	summ dominant_land_to_pop, d
	local median_value = (r(p50))
restore

g low_land_inequality = (dominant_land_to_pop < `median_value')
replace low_land_inequality = . if dominant_land_to_pop == .

codebook village if low_land_inequality == 1
codebook village if low_land_inequality == 0

******************************************************************************

*** Dependent variables ***

* Borrowing sources 
egen borrowed_rel = anymatch(q3p16_1 q3p16_2 q3p16_3 q3p16_4 q3p16_5 q3p16_6 q3p16_7 q3p16_8), v(5)
egen borrowed_caste = anymatch(q3p17_1 q3p17_2 q3p17_3 q3p17_4 q3p17_5 q3p17_6 q3p17_7 q3p17_8), v(1)
egen borrowed_friend = anymatch(q3p16_1 q3p16_2 q3p16_3 q3p16_4 q3p16_5 q3p16_6 q3p16_7 q3p16_8), v(6)
egen borrowed_moneylender = anymatch(q3p16_1 q3p16_2 q3p16_3 q3p16_4 q3p16_5 q3p16_6 q3p16_7 q3p16_8), v(7)
egen borrowed_employer = anymatch(q3p16_1 q3p16_2 q3p16_3 q3p16_4 q3p16_5 q3p16_6 q3p16_7 q3p16_8), v(8)

/* Borrowing from caste member: Includes relatives, and anyone else the respondent 
identifies as of their caste */
g borrowed_rel_caste = (borrowed_rel == 1 | borrowed_caste == 1)

qui summ borrowed_rel_caste if telangana == 0
local borrowed_rel_caste_mean = (r(mean)) 

	// As a fraction of the mean in the comparison state of Andhra Pradesh
g adj_borrowed_rel_caste = borrowed_rel_caste/`borrowed_rel_caste_mean'

* Donation to marginalized castes

ren q8p22 donation

* Inter-caste relations 

	// Orient variables so that higher values correspond to "better" outcomes
tab1 q5p4 q5p6 q5p7 q5p8 q5p9 q5p10 q5p11 q5p12 q5p13

g share_meal = 4 - q5p7
g share_meal_others = 4 - q5p6

g friends_same_caste = q5p10
replace friends_same_caste = . if q5p10 > 1

g caste_conflict = (q5p13 == 1 | q5p13 == 2)

* Inter-caste attitudes

	// Orient variables so that higher values correspond to "better" outcomes
g q5p9_recoded = 1 if q5p9 == 1
replace q5p9_recoded = 2 if q5p9 == 3
replace q5p9_recoded = 3 if q5p9 == 2
g caste_neighbor = 4 - q5p9_recoded

g caste_trust = 4 - q5p11
g caste_neighbor_others = 4 - q5p8
g castes_live_separately = q5p4

* Festival spending

g festival_spending = q3p6

	// Code missing values appropriate
replace festival_spending = . if q3p6 < 0

	// Drop 3 outliers (data entry errors)
replace festival_spending = . if q3p6 > 100000	
	
	// As a fraction of the mean in the comparison state of Andhra Pradesh
qui summ festival_spending if telangana == 0
local festival_spending_mean = (r(mean)) 
g adj_festival_spending = festival_spending/`festival_spending_mean'

* Perceptions of inequality

g growing_inequality = 4 - q5p17
g inequality_increased = (q5p17 == 1)

g demand_social_insurance = q6p24
replace demand_social_insurance = . if demand_social_insurance < 0

******************************************************************************

*** Respondent characteristics ***

* Age
g age = q1p4
g age_squared = q1p4 * q1p4
summ age, d
g older = (age > r(p50))

* Gender
g female = (q1p7 == 2)

* Education
tab education, sort
summ education, d
g primary_edu = (education >= 5)
tab primary_edu
	/*
		Modal respondent (48.5%) never went to school. Median years of education is 2. 
		45% respondents completed primary education.
	*/

* Head of household
g hh_head = (q1p3 == 1)

* Token game to measure state identity
g tokens_caste_member = q5p2a 
replace tokens_caste_member = . if tokens_caste_member < 0

g tokens_family_member = q5p1a
replace tokens_family_member = . if tokens_family_member < 0

******************************************************************************

*** Label variables ***

lab var telangana_ownagriland "Telangana * Own land"
lab var telangana "Telangana"
lab var ownagriland "Own land"
lab var borrowed_rel_caste "Borrowed from caste member"
lab var adj_borrowed_rel_caste "Borrowed from caste member (fraction of AP)"
lab var donation "Donation (Indian Rupees)"
lab var share_meal "Share meals with other castes (1-3 scale)"
lab var share_meal_others "People in village share meals with other castes (1-3 scale)"
lab var friends_same_caste "Most/all friends from the same caste"
lab var caste_conflict "Caste conflict in village"
lab var festival_spending "Festival spending in past year (Indian Rupees)"
lab var adj_festival_spending "Festival spending in past year (Fraction of AP)"
lab var caste_neighbor "Willing to have other caste as neighbor (1-3 scale)"
lab var caste_neighbor_others "People in village willing to have other caste as neighbor (1-3 scale)"
lab var caste_trust "People of other castes can be trusted (1-3 scale)"
lab var hh_size "Household size"
lab var village_population "Village population"
lab var castes_in_village "No. of castes in village"
lab var other_welfare "Other welfare benefits"

label values q3p20_percent*		// Variable labelled erroneously

******************************************************************************

save "$data/Household Survey V2_clean.dta", replace
clear

